在此練習中,我們來看看如何使用相機的投影矩陣,並且加上前一篇文的相機姿態轉換。
首先,有別於前面自己手動計算轉換矩陣,我們要使用更為方便的 look_at
函數,給定相機位置、觀看位置、以及上方向,就可以得到相機的轉換矩陣。
def look_at(camera_pos, target_pos, up_vec):
# Compute the forward, right, and up vectors and normalize them
forward = target_pos - camera_pos
forward = forward / np.linalg.norm(forward)
right = np.cross(up_vec, forward)
right = right / np.linalg.norm(right)
up = np.cross(forward, right)
up = up / np.linalg.norm(up)
rotation_matrix = np.eye(4, dtype=np.float32)
rotation_matrix[:3, 0] = right # First column is the right vector (x-axis)
rotation_matrix[:3, 1] = up # Second column is the up vector (y-axis)
rotation_matrix[:3, 2] = forward # Third column is the forward vector (z-axis)
# Step 5: Create the translation part (camera position in world space)
translation_matrix = np.eye(4, dtype=np.float32)
translation_matrix[:3, 3] = camera_pos # Set camera position in the translation part
# Step 6: Combine the rotation and translation into the world matrix
camera_to_world_matrix = translation_matrix @ rotation_matrix
return camera_to_world_matrix
計算過程有些繁複,但主要是要算出相機的右、上、前方向(單位向量),當你知道了兩個向量,就可以使用外積計算垂直的另一個向量,然後組合成一個旋轉矩陣,再加上相機的位置,就可以得到相機的轉換矩陣。
假設我們在放入一個 3D 的正方體,邊長為 1,並且放在原點。我們可以將相機放在 camera_pos=(3, 3, 3)
的位置,觀看原點 (0, 0, 0)
的位置;在我們這個世界中,+z 方向是向上的,不過我們希望轉換後,相機的 y軸是朝下 (-z) 的,所以我們可以設定up_vec= (0, 0, -1)
,這樣我們就可以得到相機的轉換矩陣。
import sys
import cv2
import numpy as np
from PIL import Image
from vispy import app, scene, visuals
# Create canvas
canvas = scene.SceneCanvas(title="vispy tutorial", keys="interactive", show=True)
# Make color white
canvas.bgcolor = "white"
# Create view and set the viewing camera
view = canvas.central_widget.add_view()
view.camera = "turntable"
view.camera.fov = 50
view.camera.distance = 10
def create_frustum(aspect_ratio=1.3, camera_to_world=np.eye(4)):
objects = [] # Record all the objects to created in this function
center = np.array([0, 0, 0])
points = np.array([
[0.5, 0.5, 1],
[0.5, -0.5, 1],
[-0.5, -0.5, 1],
[-0.5, 0.5, 1],
])
points[:, 0] *= aspect_ratio
for i in range(4):
line = scene.visuals.Line(pos=np.array([center, points[i]]), color="red", antialias=True, width=2, parent=view.scene)
objects.append(line)
line = scene.visuals.Line(pos=np.array([points[i], points[(i + 1) % 4]]), color="red", antialias=True, width=2, parent=view.scene)
objects.append(line)
camera_axis = scene.visuals.XYZAxis(parent=view.scene, width=2, antialias=True)
objects.append(camera_axis)
# Create the semi-transparent plane
plane = scene.visuals.Polygon(pos=points, color=(1, 0, 0, 0.5), parent=view.scene)
# Here the z-axis of the plane is ignored, so we need to translate it
plane.transform = scene.transforms.MatrixTransform()
plane.transform.translate([0, 0, 1])
objects.append(plane)
new_transform = scene.transforms.MatrixTransform()
new_transform.matrix = camera_to_world.T # NOTE: we need to transpose the matrix
for object in objects:
object.transform = new_transform * object.transform
def look_at(camera_pos, target_pos, up_vec):
# Compute the forward, right, and up vectors and normalize them
forward = target_pos - camera_pos
forward = forward / np.linalg.norm(forward)
right = np.cross(up_vec, forward)
right = right / np.linalg.norm(right)
# Recompute the real up vector
up = np.cross(forward, right)
up = up / np.linalg.norm(up)
rotation_matrix = np.eye(4, dtype=np.float32)
rotation_matrix[:3, 0] = right # First column is the right vector (x-axis)
rotation_matrix[:3, 1] = up # Second column is the up vector (y-axis)
rotation_matrix[:3, 2] = forward # Third column is the forward vector (z-axis)
# Create the translation part (camera position in world space)
translation_matrix = np.eye(4, dtype=np.float32)
translation_matrix[:3, 3] = camera_pos # Set camera position in the translation part
# Combine the rotation and translation into the world matrix
camera_to_world_matrix = translation_matrix @ rotation_matrix
return camera_to_world_matrix
camera_pos = np.array([3, 3, 3])
target_pos = np.array([0, 0, 0])
up_vec = np.array([0, 0, -1])
camera_to_world = look_at(camera_pos, target_pos, up_vec)
create_frustum(camera_to_world=camera_to_world)
world_axis = scene.visuals.XYZAxis(parent=view.scene, width=2, antialias=True)
world_cube = scene.visuals.Cube(size=1.0, edge_color=[0.0, 0.0, 0.0], color=[0.5, 0.5, 0.5], parent=view.scene)
if __name__ == "__main__":
if sys.flags.interactive != 1:
app.run()
假如我們想要將這個正方體投影到一個 2D 平面上,我們可以使用相機的投影矩陣。首先定義一個project_to_camera
函數,給定相機的轉換矩陣、焦距、中心點、畫面的高度和寬度。以下是大致的步驟:
def project_to_camera(camera_to_world, fx, fy, cx, cy, height, width):
# Define the cube vertices and faces
cube_vertices = np.array([
[-0.5, -0.5, 0.5],
[0.5, -0.5, 0.5],
[0.5, 0.5, 0.5],
[-0.5, 0.5, 0.5],
[-0.5, -0.5, -0.5],
[0.5, -0.5, -0.5],
[0.5, 0.5, -0.5],
[-0.5, 0.5, -0.5],
])
cube_faces = np.array([
[0, 1, 2, 3],
[4, 5, 6, 7],
[0, 1, 5, 4],
[2, 3, 7, 6],
[0, 3, 7, 4],
[1, 2, 6, 5],
])
world_to_camera = np.linalg.inv(camera_to_world)
# Create the intrinsic matrix
intrinsic_matrix = np.eye(4)
intrinsic_matrix[0, 0] = fx
intrinsic_matrix[1, 1] = fy
intrinsic_matrix[0, 2] = cx
intrinsic_matrix[1, 2] = cy
# Build the homogeneous coordinates from the vertices
points = np.ones((cube_vertices.shape[0], 4))
points[:, :3] = cube_vertices
#### KEY PART ####
points_in_camera_frame = world_to_camera @ points.T
points_projected = intrinsic_matrix @ points_in_camera_frame
points_projected = points_projected.T
points_projected = points_projected[:, :2] / (points_projected[:, 2:3] + 1e-6)
##################
# Draw the cube edges in the image
image = np.zeros((height, width, 3), dtype=np.uint8)
for face in cube_faces:
for i in range(4):
start = points_projected[face[i]]
end = points_projected[face[(i + 1) % 4]]
start = start.astype(np.int32)
end = end.astype(np.int32)
image = cv2.line(image, tuple(start), tuple(end), (255, 255, 255), 2, cv2.LINE_AA)
image = Image.fromarray(image)
image.save("cube.png")
project_to_camera(
camera_to_world=camera_to_world,
fx=400,
fy=400,
cx=320,
cy=240,
height=480,
width=640
)
就會得到 2D 的投影圖片: